Data

Time series of monthly maximum rainfall [mm] in a day

library(xts)
library(zoo)
library(tseries) 
library(copula)
setwd("/home/davide/università/tesi magistrale/dati/arpa")
load("only_rain.RData")
print(coordinates)
##                   station alt      lat     long
## 1                Brugnera  22 45.91792 12.54500
## 2      Capriva.del.Friuli  85 45.95809 13.51233
## 3   Cervignano.del.Friuli   8 45.84949 13.33701
## 4     Cividale.del.Friuli 127 46.08044 13.42001
## 5                Codroipo  37 45.95236 13.00274
## 6                Enemonzo 438 46.41042 12.86254
## 7                 Fagagna 148 46.10169 13.07389
## 8                Fossalon   0 45.71477 13.45886
## 9       Gemona.del.Friuli 184 46.26130 13.12209
## 10         Gradisca.d.Is.  29 45.88979 13.48181
## 11                   Musi 600 46.31266 13.27468
## 12 Palazzolo.dello.Stella   5 45.80572 13.05260
## 13       San.Vito.al.Tgl.  21 45.89566 12.81499
## 14         Sgonico.Zgonik 268 45.73800 13.74206
## 15             Talmassons  16 45.88231 13.15779
## 16               Tarvisio 794 46.51078 13.55189
## 17             Udine.S.O.  91 46.03521 13.22667
## 18                 Vivaro 142 46.07653 12.76881
knitr::include_graphics("mappa_stazioni_rain.png")

#in separate plots
trace(plot.zoo, 
      quote(mtext <- function(...) graphics::mtext(..., cex = 0.5, las = 1)))
## [1] "plot.zoo"
plot.zoo(clean_rain_xts, oma = c(6, 5, 5, 0))
## Tracing plot.zoo(clean_rain_xts, oma = c(6, 5, 5, 0)) on entry

untrace(plot.zoo)

Serial independence test

Acf plots and ljung box test

par(mfrow=c(2,3))  #soluzione temporanea con più plot
for (x in colnames(clean_rain_xts)){
  acf(ts(clean_rain_xts[,x]), main=x)    #plot of the ACF
}

#round function not working
for (x in colnames(clean_rain_xts)){
  print(paste0("p-value of Ljung-Box test on rain in ", x , " is:", Box.test(clean_rain_xts[,x], lag = 20,      type = "Ljung-Box")$p.value))
}
## [1] "p-value of Ljung-Box test on rain in Brugnera is:0.131982372909309"
## [1] "p-value of Ljung-Box test on rain in Capriva.del.Friuli is:0.342420393409757"
## [1] "p-value of Ljung-Box test on rain in Cervignano.del.Friuli is:0.764437631764927"
## [1] "p-value of Ljung-Box test on rain in Cividale.del.Friuli is:0.298603616354461"
## [1] "p-value of Ljung-Box test on rain in Codroipo is:0.194210840006664"
## [1] "p-value of Ljung-Box test on rain in Enemonzo is:0.0252374442208957"
## [1] "p-value of Ljung-Box test on rain in Fagagna is:0.406266394402289"
## [1] "p-value of Ljung-Box test on rain in Fossalon is:0.325963162941074"
## [1] "p-value of Ljung-Box test on rain in Gemona.del.Friuli is:0.023805779326967"
## [1] "p-value of Ljung-Box test on rain in Gradisca.d.Is. is:0.280495475660849"
## [1] "p-value of Ljung-Box test on rain in Musi is:0.00475603287750948"
## [1] "p-value of Ljung-Box test on rain in Palazzolo.dello.Stella is:0.934749836668725"
## [1] "p-value of Ljung-Box test on rain in San.Vito.al.Tgl. is:0.868452009714319"
## [1] "p-value of Ljung-Box test on rain in Sgonico.Zgonik is:0.840696071596907"
## [1] "p-value of Ljung-Box test on rain in Talmassons is:0.909658489009452"
## [1] "p-value of Ljung-Box test on rain in Tarvisio is:2.14358588460639e-05"
## [1] "p-value of Ljung-Box test on rain in Udine.S.O. is:0.253189905807489"
## [1] "p-value of Ljung-Box test on rain in Vivaro is:0.335901375480045"
for (x in colnames(clean_rain_xts)){
  print(paste0("p-value of Ljung-Box test on squared rain in ", x , " is:", Box.test(clean_rain_xts[, x]^2, lag = 20,   type = "Ljung-Box")$p.value))
}
## [1] "p-value of Ljung-Box test on squared rain in Brugnera is:0.163294547971732"
## [1] "p-value of Ljung-Box test on squared rain in Capriva.del.Friuli is:0.841589296245896"
## [1] "p-value of Ljung-Box test on squared rain in Cervignano.del.Friuli is:0.445652499691442"
## [1] "p-value of Ljung-Box test on squared rain in Cividale.del.Friuli is:0.926739822293949"
## [1] "p-value of Ljung-Box test on squared rain in Codroipo is:0.256656863284502"
## [1] "p-value of Ljung-Box test on squared rain in Enemonzo is:0.0704492125027978"
## [1] "p-value of Ljung-Box test on squared rain in Fagagna is:0.564285551650741"
## [1] "p-value of Ljung-Box test on squared rain in Fossalon is:0.194653546913393"
## [1] "p-value of Ljung-Box test on squared rain in Gemona.del.Friuli is:0.0215392068379517"
## [1] "p-value of Ljung-Box test on squared rain in Gradisca.d.Is. is:0.29739985117221"
## [1] "p-value of Ljung-Box test on squared rain in Musi is:0.0124915872643815"
## [1] "p-value of Ljung-Box test on squared rain in Palazzolo.dello.Stella is:0.799616200060337"
## [1] "p-value of Ljung-Box test on squared rain in San.Vito.al.Tgl. is:0.986513470431944"
## [1] "p-value of Ljung-Box test on squared rain in Sgonico.Zgonik is:0.67278247569016"
## [1] "p-value of Ljung-Box test on squared rain in Talmassons is:0.793492215144275"
## [1] "p-value of Ljung-Box test on squared rain in Tarvisio is:1.09083383346142e-05"
## [1] "p-value of Ljung-Box test on squared rain in Udine.S.O. is:0.0843148611886035"
## [1] "p-value of Ljung-Box test on squared rain in Vivaro is:0.333677758332719"

Rain pairwise scatterplot

Slight seasonality effect on some stations (Tarvisio, Gemona, Enemonzo,… ), but it can be safely ignored. The data can be used directly without passing through residuals, transformation of the data to pseudo-observations, and pairwise scatterplots.

#create the matrices of pseudo-observations (formed from original rain and deseasoned T_maxseries)
clean_rain<-as.matrix(clean_rain_xts)
colnames(clean_rain)<-colnames(clean_rain_xts)
clean_rain<-pobs(clean_rain)

pairs2(clean_rain, cex=0.6)

Heatmap rain Kendall-tau

library(heatmaply)
heatmaply_cor(corKendall(clean_rain), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none")

Exchangeability test

Exchangeability test done on the lower triangular matrix (all elements of pairs2 below the matrix diagonal) high p-value for almost all (all exchangeable)

which(as.numeric(exchangeability_test_matrix[3,]) < 0.05)
## [1]  35  56  68 112 117 118 126 127 129

The low p-value for exchangeability is for the cases:

exchangeability_test_matrix[,which(as.numeric(exchangeability_test_matrix[3,]) < 0.05)]
##      [,1]                    [,2]                     [,3]                
## [1,] "Cervignano.del.Friuli" "Cividale.del.Friuli"    "Codroipo"          
## [2,] "Codroipo"              "Palazzolo.dello.Stella" "Musi"              
## [3,] "0.00949050949050949"   "0.0134865134865135"     "0.0014985014985015"
##      [,4]                 [,5]                 [,6]                
## [1,] "Gemona.del.Friuli"  "Gemona.del.Friuli"  "Gradisca.d.Is."    
## [2,] "San.Vito.al.Tgl."   "Vivaro"             "Musi"              
## [3,] "0.0024975024975025" "0.0154845154845155" "0.0164835164835165"
##      [,7]                     [,8]                 [,9]                
## [1,] "Musi"                   "Musi"               "Musi"              
## [2,] "Palazzolo.dello.Stella" "San.Vito.al.Tgl."   "Talmassons"        
## [3,] "0.0474525474525475"     "0.0044955044955045" "0.0104895104895105"

Looking at data it seems that it is a problem of not enough data rather than non-exchangeability

Radial symmetry test

More or less half of the pairs are radially symmetric

which(as.numeric(radial_test_matrix[3,]) < 0.05)
##  [1]   3   4   5   6   7   8  10  11  14  16  19  21  24  25  26  27  30  31  38
## [20]  41  50  53  55  57  60  63  64  66  68  73  76  81  82  84  86  89  91  92
## [39]  93  95  96 104 105 111 112 114 116 117 122 126 127 129 131 138 141 142 144
## [58] 148 150 151 153
#radial_test_matrix[,which(as.numeric(radial_test_matrix[3,]) < 0.05)]

Extreme value dependence test

low p-value for all (no extreme value dependence)

which(as.numeric(extreme_value_test_matrix[3,]) > 0.05)
## integer(0)

Kendall distribution function

diagonal independence, upper curve perfect positive dependence. On all there is strong lower tail dependence (“drought” periods are extendend to the whole area), not on all there is a strong upper tail dependence (concurrent extreme rainfall is rarer and occurrs in close stations)

library(VineCopula)
par(mfrow=c(2,3))  #soluzione temporanea con più plot
for (i in 1:(length(pairwise)/2)){ 
  BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}

Lower tail dependence coefficient heatmap

heatmaply_cor(lower_tdc, xlab = "Stations", 
          ylab = "Stations", dendrogram = "none", scale="none")

Upper tail dependence coefficient heatmap

Both heatmaps confirm what seen from Kendall distribution function

heatmaply_cor(upper_tdc, xlab = "Stations", 
          ylab = "Stations", dendrogram = "none", scale="none")

Dissimilarity matrix and clustering

The lower tdc is high and similar for all couples so it is not useful for clustering. The clustering will be done using the dissimilarity matrix of the upper tdc (-log(upper_tdc)) Different methods for the clustering are tested

dissimilarity_lower<- (2*(1-lower_tdc))^0.5
dissimilarity_upper<- (2*(1-upper_tdc))^0.5

#test different clustering methods
upper_clustering_average<-hclust(d=as.dist(dissimilarity_upper), method="average")
upper_clustering_complete <- hclust(d=as.dist(dissimilarity_upper), method = "complete")

#dendrogram plots
#par(mfrow = c(2, 2))
plot(upper_clustering_average, main="Average")

plot(upper_clustering_complete, main="Complete")
abline(h=1.35, col="blue")

Using the complete method

With average there are 4 (?) groups 11)“Enemonzo”, “Tarvisio”(BLACK) 12)“Fagagna”, “Brugnera”, “Vivaro”, Gemona.del.Friuli, Musi (GREEN) 13)“Talmassons”, “Cividale.del.Friuli”, “Udine.S.O.”, “Palazzolo.dello.Stella”,“San.Vito.al.Tgl.”, “Codroipo”(BLUE) 14)“Fossalon”, “Sgonico.Zgonik”, “Gradisca.d.Is.”, “Capriva.del.Friuli”, “Cervignano.del.Friuli”(RED)

knitr::include_graphics("mappa_stazioni_rain_cluster_complete_utdc.png")

Validity check of clusters

cluster11<-clean_rain[,c("Enemonzo", "Tarvisio")]
cluster12<-clean_rain[,c("Brugnera", "Vivaro", "Fagagna", "Gemona.del.Friuli", "Musi")]  
cluster13<-clean_rain[,c("Talmassons", "Cividale.del.Friuli", "Udine.S.O.", "Palazzolo.dello.Stella","San.Vito.al.Tgl.", "Codroipo")]
cluster14<-clean_rain[,c("Fossalon", "Sgonico.Zgonik", "Gradisca.d.Is.", "Capriva.del.Friuli", "Cervignano.del.Friuli")]

Scatterplots of clusters K-plot and Heatmap of tdc in clusters:

pairs2(cluster11, main="cluster11")

heatmaply_cor(fitLambda(cluster11, lower.tail  = FALSE), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster11")
pairwise<-combn(colnames(cluster11), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3))  #soluzione temporanea con più plot
heatmaply_cor(corKendall(cluster11), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster11")
for (i in 1:(length(pairwise)/2)){ 
  BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}

pairs2(cluster12, main="cluster12")

heatmaply_cor(fitLambda(cluster12, lower.tail  = FALSE), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster12")
pairwise<-combn(colnames(cluster12), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3))  #soluzione temporanea con più plot
heatmaply_cor(corKendall(cluster12), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster12")
for (i in 1:(length(pairwise)/2)){ 
  BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}

pairs2(cluster13, main="cluster13")

heatmaply_cor(fitLambda(cluster13, lower.tail  = FALSE), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster13")
pairwise<-combn(colnames(cluster13), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster13), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster13")
par(mfrow=c(2,3))  #soluzione temporanea con più plot
for (i in 1:(length(pairwise)/2)){ 
  BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}

pairs2(cluster14, main="cluster14")

heatmaply_cor(fitLambda(cluster14, lower.tail  = FALSE), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster14")
pairwise<-combn(colnames(cluster14), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster14), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster14")
par(mfrow=c(2,3))  #soluzione temporanea con più plot
for (i in 1:(length(pairwise)/2)){ 
  BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}

Clustering on Kendall tau

tau_matrix<-corKendall(clean_rain)

dissimilarity_diff_ken<- (2*(1-tau_matrix))^0.5

ken_clustering_diff_average<-hclust(d=as.dist(dissimilarity_diff_ken), method="average")
ken_clustering_diff_complete <- hclust(d=as.dist(dissimilarity_diff_ken), method = "complete")


plot(ken_clustering_diff_average, main="Average")

plot(ken_clustering_diff_complete, main="Complete")
abline(h=1.1, col="blue")

knitr::include_graphics("mappa_stazioni_rain_cluster_complete_Kendall.png")

cluster1<-clean_rain[,c("Enemonzo", "Tarvisio", "Gemona.del.Friuli", "Musi")]
cluster2<-clean_rain[,c("Talmassons", "Cividale.del.Friuli", "Udine.S.O.", "Palazzolo.dello.Stella","San.Vito.al.Tgl.", "Codroipo", "Cervignano.del.Friuli", "Brugnera", "Vivaro", "Fagagna")]
cluster3<-clean_rain[,c("Fossalon", "Sgonico.Zgonik", "Gradisca.d.Is.", "Capriva.del.Friuli")]


pairs2(cluster1, main="cluster1")

heatmaply_cor(fitLambda(cluster1, lower.tail  = FALSE), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster1")
pairwise<-combn(colnames(cluster1), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3))  #soluzione temporanea con più plot
heatmaply_cor(corKendall(cluster1), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster1")
for (i in 1:(length(pairwise)/2)){ 
  BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}

pairs2(cluster2, main="cluster2")

heatmaply_cor(fitLambda(cluster2, lower.tail  = FALSE), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster2")
pairwise<-combn(colnames(cluster2), 2) #this create all the combinations of the stations (no repetitions)
par(mfrow=c(2,3))  #soluzione temporanea con più plot
heatmaply_cor(corKendall(cluster2), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster2")
for (i in 1:(length(pairwise)/2)){ 
  BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}

pairs2(cluster13, main="cluster3")

heatmaply_cor(fitLambda(cluster3, lower.tail  = FALSE), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="lambda cluster3")
pairwise<-combn(colnames(cluster3), 2) #this create all the combinations of the stations (no repetitions)
heatmaply_cor(corKendall(cluster3), xlab = "Stations", 
              ylab = "Stations", dendrogram = "none", scale="none", main="tau cluster3")
par(mfrow=c(2,3))  #soluzione temporanea con più plot
for (i in 1:(length(pairwise)/2)){ 
  BiCopKPlot(clean_rain[,pairwise[1,i]],clean_rain[,pairwise[2,i]], main = pairwise[1:2,i])
}

Copula fitting on complete clusters (of upper Tail dependence)

Cluster 11

both exchangeable and radially symmetric

pairs2(cluster11, main="cluster11")

radSymTest(cluster11)$p.value
## Warning in radSymTest(cluster11): argument 'ties' set to TRUE
## [1] 0.4350649
exchTest(cluster11)$p.value
## Warning in exchTest(cluster11): argument 'ties' set to TRUE
## [1] 0.2032967
t.copula_cluster11 <- tCopula(dim = dim(cluster11)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster11, x=cluster11, simulation="mult")  
## 
##  Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
##  2, with 'method'="Sn", 'estim.method'="mpl":
## 
## data:  x
## statistic = 0.015887, parameter = 0.75132, p-value = 0.496

Cluster 12

low p-value on radial symmetry.

pairs2(cluster12, main="cluster12")

radSymTest(cluster12)$p.value
## Warning in radSymTest(cluster12): argument 'ties' set to TRUE
## [1] 0.003496503
exchTest(cluster12)$p.value
## Warning in exchTest(cluster12): argument 'ties' set to TRUE
## [1] 0.6498501
#both exchangeable and radially symmetric

fit_cluster12 <- fitCopula(claytonCopula(dim = dim(cluster12)[2]),
                    data = cluster12, method = "mpl")  
gofCopula(fit_cluster12@copula, x = cluster12)
## Warning in .gofPB(copula, x, N = N, method = method, estim.method =
## estim.method, : argument 'ties' set to TRUE
## 
##  Parametric bootstrap-based goodness-of-fit test of Clayton copula, dim.
##  d = 5, with 'method'="Sn", 'estim.method'="mpl":
## 
## data:  x
## statistic = 0.22175, parameter = 1.7045, p-value = 0.002498

Cluster 13

both exchangeable and radially symmetric (but lowish p value on radial)

pairs2(cluster13, main="cluster13")

radSymTest(cluster13)$p.value
## Warning in radSymTest(cluster13): argument 'ties' set to TRUE
## [1] 0.07842158
exchTest(cluster13)$p.value
## Warning in exchTest(cluster13): argument 'ties' set to TRUE
## [1] 0.9275724
#both exchangeable and radially symmetric

t.copula_cluster13 <- tCopula(dim = dim(cluster13)[2], dispstr = "un", df.fixed = TRUE)
gofCopula(t.copula_cluster13, x=cluster13, simulation="mult")  
## 
##  Multiplier bootstrap-based goodness-of-fit test of t-copula, dim. d =
##  6, with 'method'="Sn", 'estim.method'="mpl":
## 
## data:  x
## statistic = 0.042934, parameter1 = 0.75121, parameter2 = 0.82238,
## parameter3 = 0.85020, parameter4 = 0.77869, parameter5 = 0.81826,
## parameter6 = 0.85999, parameter7 = 0.65849, parameter8 = 0.67274,
## parameter9 = 0.76231, parameter10 = 0.71793, parameter11 = 0.75000,
## parameter12 = 0.84544, parameter13 = 0.75894, parameter14 = 0.76397,
## parameter15 = 0.84009, p-value = 0.5939

Cluster 14

exchangeable (lowish p-value) but not radially symmetric

pairs2(cluster14, main="cluster14")

radSymTest(cluster14)$p.value
## Warning in radSymTest(cluster14): argument 'ties' set to TRUE
## [1] 0.004495504
exchTest(cluster14)$p.value
## Warning in exchTest(cluster14): argument 'ties' set to TRUE
## [1] 0.06643357
fit_cluster14 <- fitCopula(claytonCopula(dim = dim(cluster12)[2]),
                                data = cluster14, method = "mpl")  
gofCopula(fit_cluster14@copula, x=cluster14)  
## Warning in .gofPB(copula, x, N = N, method = method, estim.method =
## estim.method, : argument 'ties' set to TRUE
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## 
##  Parametric bootstrap-based goodness-of-fit test of Clayton copula, dim.
##  d = 5, with 'method'="Sn", 'estim.method'="mpl":
## 
## data:  x
## statistic = 0.27465, parameter = 1.7927, p-value = 0.0004995